{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import seaborn as sns\n",
    "from collections import Counter\n",
    "from sklearn.preprocessing import LabelEncoder\n",
    "import pandas_profiling as ppf"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "####  ================================== step_1/5 读取数据时将ID作为index ========================================"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "# base信息\n",
    "base_train_sum=pd.read_csv(r'./data/base_train_sum.csv',encoding='gb2312').reset_index(drop=True).set_index('ID')\n",
    "base_varify=pd.read_csv(r'./data/base_verify1.csv',encoding='gb2312').reset_index(drop=True).set_index('ID')\n",
    "knowledge_train=pd.read_csv(r'./data/knowledge_train_sum.csv',encoding='gb2312').reset_index(drop=True).set_index('ID')\n",
    "paient_information_verify1=pd.read_csv(r'./data/paient_information_verify1.csv',encoding='gb2312').reset_index(drop=True).set_index('ID')\n",
    "# money信息\n",
    "money_report_train_sum=pd.read_csv(r'./data/money_report_train_sum.csv',encoding='gb2312').reset_index(drop=True).set_index('ID')\n",
    "money_information_verify1=pd.read_csv(r'./data/money_information_verify1.csv',encoding='gb2312').reset_index(drop=True).set_index('ID')\n",
    "year_report_train_sum=pd.read_csv(r'./data/year_report_train_sum.csv',encoding='gb2312').reset_index(drop=True).set_index('ID')\n",
    "year_report_verify1=pd.read_csv(r'./data/year_report_verify1.csv',encoding='gb2312').reset_index(drop=True).set_index('ID')\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "####  ========================================= step_2/5 数据合并 =============================================="
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 基本信息的数据\n",
    "no_year=pd.concat([pd.concat([base_train_sum,base_varify],sort=True).drop(columns='控制人ID'), pd.concat([knowledge_train,paient_information_verify1],sort=True)],axis=1,sort=True)\n",
    "# no_year"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 三年的有关money的数据\n",
    "three_year=pd.concat([pd.concat([money_report_train_sum,money_information_verify1]),pd.concat([year_report_train_sum,year_report_verify1]).drop(columns='year')],axis=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "####  ================================== step_3/5 对money的three_year处理 ========================================"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 137802 entries, 28 to 2490465\n",
      "Data columns (total 18 columns):\n",
      "year           137802 non-null float64\n",
      "主营业务收入         136444 non-null float64\n",
      "从业人数           136477 non-null float64\n",
      "债权融资成本         136418 non-null float64\n",
      "债权融资额度         136464 non-null float64\n",
      "内部融资和贸易融资成本    136410 non-null float64\n",
      "内部融资和贸易融资额度    136484 non-null float64\n",
      "净利润            136369 non-null float64\n",
      "利润总额           136471 non-null float64\n",
      "所有者权益合计        136418 non-null float64\n",
      "纳税总额           136487 non-null float64\n",
      "股权融资成本         136414 non-null float64\n",
      "股权融资额度         136450 non-null float64\n",
      "营业总收入          136382 non-null float64\n",
      "负债总额           136482 non-null float64\n",
      "资产总额           136345 non-null float64\n",
      "项目融资和政策融资成本    136392 non-null float64\n",
      "项目融资和政策融资额度    136406 non-null float64\n",
      "dtypes: float64(18)\n",
      "memory usage: 20.0 MB\n"
     ]
    }
   ],
   "source": [
    "# 把‘year’补充完整,此步较慢，可以保存文件至three_year\n",
    "pp=[]\n",
    "for i in list(set(three_year.index)):\n",
    "    a=three_year[three_year.index==i]['year'].iloc[0]\n",
    "    b=three_year[three_year.index==i]['year'].iloc[1]\n",
    "    c=three_year[three_year.index==i]['year'].iloc[2]\n",
    "    year=[2015,2016,2017]\n",
    "    real=[a,b,c]\n",
    "    if set(year)==set(real):\n",
    "        continue\n",
    "    else:\n",
    "        pp.append(i)\n",
    "# three_year.to_csv('./three_year.csv')\n",
    "df=three_year[three_year.index==pp[0]]\n",
    "for i in range(1,len(pp)):\n",
    "    df=pd.concat([three_year[three_year.index==pp[i]],df])\n",
    "    \n",
    "df['year']=[2015.,2016.,2017.]*int(df.shape[0]/3)\n",
    "# three_year=three_year.combine_first(df)  index唯一时可以用，这里不能用\n",
    "three_year.drop(index=[i for i in pp],inplace=True)\n",
    "three_year=pd.concat([three_year,df],sort=True)\n",
    "three_year.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "45931"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 三年数据分别处理，留下三年ID的交集\n",
    "money_2015=three_year[three_year['year']==2015].drop(['year'],axis=1)\n",
    "money_2016=three_year[three_year['year']==2016].drop(['year'],axis=1)\n",
    "money_2017=three_year[three_year['year']==2017].drop(['year'],axis=1)\n",
    "# inner0=list(set(money_2015.index).intersection(set(money_2016.index)).intersection(set(money_2017.index)))  \n",
    "inner=list(set(no_year.index).intersection(set(money_2015.index)))                                                    # base的交集\n",
    "len(inner)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[(45931, 17), (45931, 17), (45931, 17)]\n"
     ]
    }
   ],
   "source": [
    "# 所有空值暂且用平均数填充（所有者权益总额=资产总额-负债总额）\n",
    "lt=[]  # 记录每一年的数据的形状\n",
    "for money in [money_2015,money_2016,money_2017]:\n",
    "    mean=money.describe()\n",
    "    values={money.columns[0]:mean[mean.columns[0]][1], money.columns[1]:mean[mean.columns[1]][1], money.columns[2]:mean[mean.columns[2]][1], money.columns[3]:mean[mean.columns[3]][1],\n",
    "           money.columns[4]:mean[mean.columns[4]][1],money.columns[5]:mean[mean.columns[5]][1],money.columns[6]:mean[mean.columns[6]][1],money.columns[7]:mean[mean.columns[7]][1],\n",
    "           money.columns[8]:mean[mean.columns[8]][1],money.columns[9]:mean[mean.columns[9]][1],money.columns[10]:mean[mean.columns[10]][1],money.columns[11]:mean[mean.columns[11]][1],\n",
    "           money.columns[12]:mean[mean.columns[12]][1],money.columns[13]:mean[mean.columns[13]][1],money.columns[14]:mean[mean.columns[14]][1],money.columns[15]:mean[mean.columns[15]][1],\n",
    "           money.columns[16]:mean[mean.columns[16]][1]}\n",
    "    money.fillna(value=values,inplace=True)\n",
    "    money['所有者权益合计']=money['资产总额']-money['负债总额']\n",
    "#     舍弃交集外的ID\n",
    "    for index in money.index:\n",
    "        if index in inner:\n",
    "            continue\n",
    "        else:\n",
    "            money.drop(index,axis=0,inplace=True) # inplace指定原地操作\n",
    "    lt.append(money.shape)\n",
    "#     对每一年的数据每一列标准化(x-mu)/std\n",
    "    for i in range(money.shape[1]):\n",
    "        money[money.columns[i]]=(money[money.columns[i]] - mean[mean.columns[i]][1])/mean[mean.columns[i]][2]\n",
    "print(lt)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>主营业务收入</th>\n",
       "      <th>从业人数</th>\n",
       "      <th>债权融资成本</th>\n",
       "      <th>债权融资额度</th>\n",
       "      <th>内部融资和贸易融资成本</th>\n",
       "      <th>内部融资和贸易融资额度</th>\n",
       "      <th>净利润</th>\n",
       "      <th>利润总额</th>\n",
       "      <th>所有者权益合计</th>\n",
       "      <th>纳税总额</th>\n",
       "      <th>股权融资成本</th>\n",
       "      <th>股权融资额度</th>\n",
       "      <th>营业总收入</th>\n",
       "      <th>负债总额</th>\n",
       "      <th>资产总额</th>\n",
       "      <th>项目融资和政策融资成本</th>\n",
       "      <th>项目融资和政策融资额度</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ID</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>-0.384416</td>\n",
       "      <td>0.063669</td>\n",
       "      <td>-0.376463</td>\n",
       "      <td>-0.376167</td>\n",
       "      <td>-0.226258</td>\n",
       "      <td>-0.226812</td>\n",
       "      <td>-0.391587</td>\n",
       "      <td>-0.247471</td>\n",
       "      <td>-0.004741</td>\n",
       "      <td>-0.463019</td>\n",
       "      <td>0.406379</td>\n",
       "      <td>0.405600</td>\n",
       "      <td>-0.338439</td>\n",
       "      <td>-0.399303</td>\n",
       "      <td>-0.584048</td>\n",
       "      <td>-0.340293</td>\n",
       "      <td>-0.340049</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>230</th>\n",
       "      <td>-0.505247</td>\n",
       "      <td>-0.102344</td>\n",
       "      <td>-0.376463</td>\n",
       "      <td>-0.376167</td>\n",
       "      <td>-0.131425</td>\n",
       "      <td>-0.132086</td>\n",
       "      <td>-0.305196</td>\n",
       "      <td>-0.402976</td>\n",
       "      <td>0.106861</td>\n",
       "      <td>-0.463019</td>\n",
       "      <td>-0.232299</td>\n",
       "      <td>-0.232355</td>\n",
       "      <td>-0.547484</td>\n",
       "      <td>-0.371525</td>\n",
       "      <td>-0.454299</td>\n",
       "      <td>-0.284360</td>\n",
       "      <td>-0.284125</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>429</th>\n",
       "      <td>1.398089</td>\n",
       "      <td>-0.215127</td>\n",
       "      <td>0.422401</td>\n",
       "      <td>0.422863</td>\n",
       "      <td>-0.325154</td>\n",
       "      <td>-0.325293</td>\n",
       "      <td>-1.181800</td>\n",
       "      <td>0.637125</td>\n",
       "      <td>-0.461944</td>\n",
       "      <td>-0.463019</td>\n",
       "      <td>1.593755</td>\n",
       "      <td>1.592084</td>\n",
       "      <td>1.565990</td>\n",
       "      <td>1.380232</td>\n",
       "      <td>1.632408</td>\n",
       "      <td>-0.340293</td>\n",
       "      <td>-0.340049</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>727</th>\n",
       "      <td>1.047925</td>\n",
       "      <td>0.355541</td>\n",
       "      <td>-0.376463</td>\n",
       "      <td>-0.376167</td>\n",
       "      <td>0.276225</td>\n",
       "      <td>0.274462</td>\n",
       "      <td>-1.224502</td>\n",
       "      <td>0.836748</td>\n",
       "      <td>-1.434351</td>\n",
       "      <td>-0.463019</td>\n",
       "      <td>2.129373</td>\n",
       "      <td>2.129724</td>\n",
       "      <td>1.686916</td>\n",
       "      <td>2.114191</td>\n",
       "      <td>1.910589</td>\n",
       "      <td>-0.340293</td>\n",
       "      <td>-0.340049</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1137</th>\n",
       "      <td>-0.505274</td>\n",
       "      <td>-0.323164</td>\n",
       "      <td>-0.024595</td>\n",
       "      <td>-0.024152</td>\n",
       "      <td>-0.213408</td>\n",
       "      <td>-0.214017</td>\n",
       "      <td>-0.345021</td>\n",
       "      <td>-0.277656</td>\n",
       "      <td>0.393514</td>\n",
       "      <td>-0.463019</td>\n",
       "      <td>-0.285456</td>\n",
       "      <td>-0.285470</td>\n",
       "      <td>-0.451257</td>\n",
       "      <td>-0.362914</td>\n",
       "      <td>-0.215609</td>\n",
       "      <td>-0.236656</td>\n",
       "      <td>-0.236361</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2884215</th>\n",
       "      <td>0.496968</td>\n",
       "      <td>-0.086906</td>\n",
       "      <td>0.670164</td>\n",
       "      <td>0.670899</td>\n",
       "      <td>-0.325154</td>\n",
       "      <td>-0.325293</td>\n",
       "      <td>-0.785107</td>\n",
       "      <td>0.233498</td>\n",
       "      <td>0.415272</td>\n",
       "      <td>-0.463019</td>\n",
       "      <td>-0.285456</td>\n",
       "      <td>-0.285470</td>\n",
       "      <td>0.608267</td>\n",
       "      <td>0.040525</td>\n",
       "      <td>0.390140</td>\n",
       "      <td>0.676944</td>\n",
       "      <td>0.677640</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3015090</th>\n",
       "      <td>0.402190</td>\n",
       "      <td>0.416576</td>\n",
       "      <td>0.161384</td>\n",
       "      <td>0.161671</td>\n",
       "      <td>1.321758</td>\n",
       "      <td>1.317170</td>\n",
       "      <td>-0.902459</td>\n",
       "      <td>0.552677</td>\n",
       "      <td>-0.623410</td>\n",
       "      <td>-0.463019</td>\n",
       "      <td>0.029675</td>\n",
       "      <td>0.030519</td>\n",
       "      <td>0.891865</td>\n",
       "      <td>0.680957</td>\n",
       "      <td>0.487089</td>\n",
       "      <td>-0.340293</td>\n",
       "      <td>-0.340049</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5991919</th>\n",
       "      <td>-0.696946</td>\n",
       "      <td>0.087269</td>\n",
       "      <td>-0.376463</td>\n",
       "      <td>-0.376167</td>\n",
       "      <td>-0.241054</td>\n",
       "      <td>-0.241419</td>\n",
       "      <td>-0.149531</td>\n",
       "      <td>-0.589776</td>\n",
       "      <td>0.216414</td>\n",
       "      <td>-0.369505</td>\n",
       "      <td>-0.258391</td>\n",
       "      <td>-0.258332</td>\n",
       "      <td>-0.708450</td>\n",
       "      <td>-0.719911</td>\n",
       "      <td>-0.870371</td>\n",
       "      <td>-0.305747</td>\n",
       "      <td>-0.305487</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1048810</th>\n",
       "      <td>-0.618820</td>\n",
       "      <td>0.301719</td>\n",
       "      <td>-0.376463</td>\n",
       "      <td>-0.376167</td>\n",
       "      <td>-0.325154</td>\n",
       "      <td>-0.325293</td>\n",
       "      <td>-0.277315</td>\n",
       "      <td>-0.472247</td>\n",
       "      <td>-0.063633</td>\n",
       "      <td>-0.463019</td>\n",
       "      <td>-0.026691</td>\n",
       "      <td>-0.026910</td>\n",
       "      <td>-0.611293</td>\n",
       "      <td>-0.351403</td>\n",
       "      <td>-0.561363</td>\n",
       "      <td>0.054895</td>\n",
       "      <td>0.055183</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2490465</th>\n",
       "      <td>-0.095199</td>\n",
       "      <td>-0.047849</td>\n",
       "      <td>0.082037</td>\n",
       "      <td>0.082428</td>\n",
       "      <td>0.202348</td>\n",
       "      <td>0.203352</td>\n",
       "      <td>-0.491845</td>\n",
       "      <td>-0.148106</td>\n",
       "      <td>-0.284044</td>\n",
       "      <td>-0.463019</td>\n",
       "      <td>0.162491</td>\n",
       "      <td>0.063230</td>\n",
       "      <td>-0.094720</td>\n",
       "      <td>0.030941</td>\n",
       "      <td>-0.179931</td>\n",
       "      <td>-0.340293</td>\n",
       "      <td>-0.223652</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>45931 rows × 17 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "           主营业务收入      从业人数    债权融资成本    债权融资额度  内部融资和贸易融资成本  内部融资和贸易融资额度  \\\n",
       "ID                                                                          \n",
       "28      -0.384416  0.063669 -0.376463 -0.376167    -0.226258    -0.226812   \n",
       "230     -0.505247 -0.102344 -0.376463 -0.376167    -0.131425    -0.132086   \n",
       "429      1.398089 -0.215127  0.422401  0.422863    -0.325154    -0.325293   \n",
       "727      1.047925  0.355541 -0.376463 -0.376167     0.276225     0.274462   \n",
       "1137    -0.505274 -0.323164 -0.024595 -0.024152    -0.213408    -0.214017   \n",
       "...           ...       ...       ...       ...          ...          ...   \n",
       "2884215  0.496968 -0.086906  0.670164  0.670899    -0.325154    -0.325293   \n",
       "3015090  0.402190  0.416576  0.161384  0.161671     1.321758     1.317170   \n",
       "5991919 -0.696946  0.087269 -0.376463 -0.376167    -0.241054    -0.241419   \n",
       "1048810 -0.618820  0.301719 -0.376463 -0.376167    -0.325154    -0.325293   \n",
       "2490465 -0.095199 -0.047849  0.082037  0.082428     0.202348     0.203352   \n",
       "\n",
       "              净利润      利润总额   所有者权益合计      纳税总额    股权融资成本    股权融资额度     营业总收入  \\\n",
       "ID                                                                              \n",
       "28      -0.391587 -0.247471 -0.004741 -0.463019  0.406379  0.405600 -0.338439   \n",
       "230     -0.305196 -0.402976  0.106861 -0.463019 -0.232299 -0.232355 -0.547484   \n",
       "429     -1.181800  0.637125 -0.461944 -0.463019  1.593755  1.592084  1.565990   \n",
       "727     -1.224502  0.836748 -1.434351 -0.463019  2.129373  2.129724  1.686916   \n",
       "1137    -0.345021 -0.277656  0.393514 -0.463019 -0.285456 -0.285470 -0.451257   \n",
       "...           ...       ...       ...       ...       ...       ...       ...   \n",
       "2884215 -0.785107  0.233498  0.415272 -0.463019 -0.285456 -0.285470  0.608267   \n",
       "3015090 -0.902459  0.552677 -0.623410 -0.463019  0.029675  0.030519  0.891865   \n",
       "5991919 -0.149531 -0.589776  0.216414 -0.369505 -0.258391 -0.258332 -0.708450   \n",
       "1048810 -0.277315 -0.472247 -0.063633 -0.463019 -0.026691 -0.026910 -0.611293   \n",
       "2490465 -0.491845 -0.148106 -0.284044 -0.463019  0.162491  0.063230 -0.094720   \n",
       "\n",
       "             负债总额      资产总额  项目融资和政策融资成本  项目融资和政策融资额度  \n",
       "ID                                                     \n",
       "28      -0.399303 -0.584048    -0.340293    -0.340049  \n",
       "230     -0.371525 -0.454299    -0.284360    -0.284125  \n",
       "429      1.380232  1.632408    -0.340293    -0.340049  \n",
       "727      2.114191  1.910589    -0.340293    -0.340049  \n",
       "1137    -0.362914 -0.215609    -0.236656    -0.236361  \n",
       "...           ...       ...          ...          ...  \n",
       "2884215  0.040525  0.390140     0.676944     0.677640  \n",
       "3015090  0.680957  0.487089    -0.340293    -0.340049  \n",
       "5991919 -0.719911 -0.870371    -0.305747    -0.305487  \n",
       "1048810 -0.351403 -0.561363     0.054895     0.055183  \n",
       "2490465  0.030941 -0.179931    -0.340293    -0.223652  \n",
       "\n",
       "[45931 rows x 17 columns]"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 三年的数据暂且取平均\n",
    "average=(money_2015+money_2016+money_2017)/3\n",
    "average"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "####  ========================================= step_4/5 对base的no_year处理 ======================================="
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "for index in no_year.index:\n",
    "        if index in inner:\n",
    "            continue\n",
    "        else:\n",
    "            no_year.drop(index,axis=0,inplace=True) # inplace指定原地操作"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Float64Index: 45931 entries, 28.0 to 5999999.0\n",
      "Data columns (total 11 columns):\n",
      "flag       35648 non-null float64\n",
      "企业类型       45469 non-null object\n",
      "区域         45506 non-null object\n",
      "控制人持股比例    45477 non-null float64\n",
      "控制人类型      45425 non-null object\n",
      "注册时间       45444 non-null float64\n",
      "注册资本       45467 non-null float64\n",
      "行业         45511 non-null object\n",
      "专利         45472 non-null float64\n",
      "商标         45457 non-null float64\n",
      "著作权        45496 non-null float64\n",
      "dtypes: float64(7), object(4)\n",
      "memory usage: 4.2+ MB\n"
     ]
    }
   ],
   "source": [
    "no_year.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 以众数填充\n",
    "lt2=[]\n",
    "No_year=no_year.drop(columns='flag')\n",
    "for i in No_year.columns:\n",
    "    lt2.append(list(Counter(no_year[i]).most_common(1)[0])[0])\n",
    "\n",
    "values={'企业类型':lt2[0],'区域':lt2[1],'控制人持股比例':lt2[2],'控制人类型':lt2[3],'注册时间':lt2[4],\n",
    "        '注册资本':lt2[5],'行业':lt2[6],'专利':lt2[7],'商标':lt2[8],'著作权':lt2[9]}\n",
    "No_year.fillna(value=values,inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [],
   "source": [
    "no_year=no_year.combine_first(No_year)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 注册时间以2000年为元年，注册资本标准化(x-mu)/std\n",
    "no_year['注册时间']=no_year['注册时间']-2000\n",
    "no_year['注册资本']=(no_year['注册资本']-no_year['注册资本'].mean())/no_year['注册资本'].std()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Counter({'交通运输业': 8114, '商业服务业': 7659, '服务业': 7597, '社区服务': 7529, '工业': 7522, '零售业': 7510})\n",
      "Counter({0: 8114, 1: 7659, 3: 7597, 4: 7529, 2: 7522, 5: 7510})\n",
      "Counter({'农民专业合作社': 9574, '有限责任公司': 9106, '合伙企业': 9105, '集体所有制企业': 9099, '股份有限公司': 9047})\n",
      "Counter({0: 9574, 2: 9106, 1: 9105, 4: 9099, 3: 9047})\n",
      "Counter({'企业法人': 23279, '自然人': 22652})\n",
      "Counter({0: 23279, 1: 22652})\n",
      "Counter({'江西': 7039, '广西': 6605, '福建': 6508, '山东': 6460, '广东': 6453, '湖北': 6450, '湖南': 6416})\n",
      "Counter({3: 7039, 2: 6605, 6: 6508, 0: 6460, 1: 6453, 4: 6450, 5: 6416})\n"
     ]
    }
   ],
   "source": [
    "# 对文本数据编码\n",
    "usa=no_year[['行业','企业类型','控制人类型','区域']]\n",
    "for col in ['行业','企业类型','控制人类型','区域']:\n",
    "    no_year[col] = LabelEncoder().fit_transform(no_year[col])\n",
    "    print(Counter(usa[col]))\n",
    "    print(Counter(no_year[col]))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "###### 行业\n",
    "商业服务业：1 ；服务业：3 ；社区服务：4 ；零售业：5 ；工业：2 ；交通运输业:0\n",
    "###### 区域\n",
    "福建：   6  ；广西：2  ； 江西：  3  ； 山东：    0  ；湖北： 4  ；湖南：5 ；广东：1 \n",
    "###### 企业类型\n",
    "农民专业合作社：0；股份有限公司：3；有限责任公司：2；集体所有制企业：4；合伙企业：1\n",
    "###### 控制人类型\n",
    "自然人:1     ;企业法人:0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>flag</th>\n",
       "      <th>专利</th>\n",
       "      <th>企业类型</th>\n",
       "      <th>区域</th>\n",
       "      <th>商标</th>\n",
       "      <th>控制人持股比例</th>\n",
       "      <th>控制人类型</th>\n",
       "      <th>注册时间</th>\n",
       "      <th>注册资本</th>\n",
       "      <th>著作权</th>\n",
       "      <th>行业</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ID</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>28.0</th>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.56</td>\n",
       "      <td>0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>-1.036705</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>230.0</th>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>-0.577344</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>429.0</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.75</td>\n",
       "      <td>1</td>\n",
       "      <td>5.0</td>\n",
       "      <td>1.635303</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>693.0</th>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.98</td>\n",
       "      <td>0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>1.175942</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>727.0</th>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.54</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.302179</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5999995.0</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.83</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.628290</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5999996.0</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.93</td>\n",
       "      <td>1</td>\n",
       "      <td>11.0</td>\n",
       "      <td>-0.994626</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5999997.0</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4</td>\n",
       "      <td>6</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.73</td>\n",
       "      <td>0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>1.410882</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5999998.0</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3</td>\n",
       "      <td>5</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.64</td>\n",
       "      <td>1</td>\n",
       "      <td>14.0</td>\n",
       "      <td>-0.174088</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5999999.0</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3</td>\n",
       "      <td>6</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.80</td>\n",
       "      <td>1</td>\n",
       "      <td>14.0</td>\n",
       "      <td>1.445948</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>45931 rows × 11 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "           flag   专利  企业类型  区域   商标  控制人持股比例  控制人类型  注册时间      注册资本  著作权  行业\n",
       "ID                                                                          \n",
       "28.0        1.0  0.0     0   6  1.0     0.56      0   7.0 -1.036705  1.0   0\n",
       "230.0       1.0  0.0     0   1  0.0     1.00      0   8.0 -0.577344  0.0   3\n",
       "429.0       1.0  1.0     4   3  0.0     0.75      1   5.0  1.635303  0.0   2\n",
       "693.0       1.0  0.0     3   0  0.0     0.98      0  11.0  1.175942  0.0   4\n",
       "727.0       1.0  0.0     3   3  0.0     0.54      0   1.0  1.302179  0.0   5\n",
       "...         ...  ...   ...  ..  ...      ...    ...   ...       ...  ...  ..\n",
       "5999995.0   0.0  0.0     1   5  1.0     0.83      0   0.0  1.628290  1.0   2\n",
       "5999996.0   0.0  1.0     0   4  1.0     0.93      1  11.0 -0.994626  0.0   5\n",
       "5999997.0   0.0  1.0     4   6  1.0     0.73      0  13.0  1.410882  0.0   0\n",
       "5999998.0   0.0  0.0     3   5  0.0     0.64      1  14.0 -0.174088  0.0   3\n",
       "5999999.0   0.0  1.0     3   6  1.0     0.80      1  14.0  1.445948  1.0   0\n",
       "\n",
       "[45931 rows x 11 columns]"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "no_year"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### =================================== step_5/5 合并base与money =============================================="
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Float64Index: 45931 entries, 28.0 to 5999999.0\n",
      "Data columns (total 28 columns):\n",
      "flag           35648 non-null float64\n",
      "专利             45931 non-null float64\n",
      "企业类型           45931 non-null int32\n",
      "区域             45931 non-null int32\n",
      "商标             45931 non-null float64\n",
      "控制人持股比例        45931 non-null float64\n",
      "控制人类型          45931 non-null int32\n",
      "注册时间           45931 non-null float64\n",
      "注册资本           45931 non-null float64\n",
      "著作权            45931 non-null float64\n",
      "行业             45931 non-null int32\n",
      "主营业务收入         45931 non-null float64\n",
      "从业人数           45931 non-null float64\n",
      "债权融资成本         45931 non-null float64\n",
      "债权融资额度         45931 non-null float64\n",
      "内部融资和贸易融资成本    45931 non-null float64\n",
      "内部融资和贸易融资额度    45931 non-null float64\n",
      "净利润            45931 non-null float64\n",
      "利润总额           45931 non-null float64\n",
      "所有者权益合计        45931 non-null float64\n",
      "纳税总额           45931 non-null float64\n",
      "股权融资成本         45931 non-null float64\n",
      "股权融资额度         45931 non-null float64\n",
      "营业总收入          45931 non-null float64\n",
      "负债总额           45931 non-null float64\n",
      "资产总额           45931 non-null float64\n",
      "项目融资和政策融资成本    45931 non-null float64\n",
      "项目融资和政策融资额度    45931 non-null float64\n",
      "dtypes: float64(24), int32(4)\n",
      "memory usage: 9.5 MB\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>flag</th>\n",
       "      <th>专利</th>\n",
       "      <th>企业类型</th>\n",
       "      <th>区域</th>\n",
       "      <th>商标</th>\n",
       "      <th>控制人持股比例</th>\n",
       "      <th>控制人类型</th>\n",
       "      <th>注册时间</th>\n",
       "      <th>注册资本</th>\n",
       "      <th>著作权</th>\n",
       "      <th>...</th>\n",
       "      <th>利润总额</th>\n",
       "      <th>所有者权益合计</th>\n",
       "      <th>纳税总额</th>\n",
       "      <th>股权融资成本</th>\n",
       "      <th>股权融资额度</th>\n",
       "      <th>营业总收入</th>\n",
       "      <th>负债总额</th>\n",
       "      <th>资产总额</th>\n",
       "      <th>项目融资和政策融资成本</th>\n",
       "      <th>项目融资和政策融资额度</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ID</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>28.0</th>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.56</td>\n",
       "      <td>0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>-1.036705</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.247471</td>\n",
       "      <td>-0.004741</td>\n",
       "      <td>-0.463019</td>\n",
       "      <td>0.406379</td>\n",
       "      <td>0.405600</td>\n",
       "      <td>-0.338439</td>\n",
       "      <td>-0.399303</td>\n",
       "      <td>-0.584048</td>\n",
       "      <td>-0.340293</td>\n",
       "      <td>-0.340049</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>230.0</th>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>-0.577344</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.402976</td>\n",
       "      <td>0.106861</td>\n",
       "      <td>-0.463019</td>\n",
       "      <td>-0.232299</td>\n",
       "      <td>-0.232355</td>\n",
       "      <td>-0.547484</td>\n",
       "      <td>-0.371525</td>\n",
       "      <td>-0.454299</td>\n",
       "      <td>-0.284360</td>\n",
       "      <td>-0.284125</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>429.0</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.75</td>\n",
       "      <td>1</td>\n",
       "      <td>5.0</td>\n",
       "      <td>1.635303</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.637125</td>\n",
       "      <td>-0.461944</td>\n",
       "      <td>-0.463019</td>\n",
       "      <td>1.593755</td>\n",
       "      <td>1.592084</td>\n",
       "      <td>1.565990</td>\n",
       "      <td>1.380232</td>\n",
       "      <td>1.632408</td>\n",
       "      <td>-0.340293</td>\n",
       "      <td>-0.340049</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>693.0</th>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.98</td>\n",
       "      <td>0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>1.175942</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.199842</td>\n",
       "      <td>0.356249</td>\n",
       "      <td>-0.463019</td>\n",
       "      <td>0.278852</td>\n",
       "      <td>0.278391</td>\n",
       "      <td>0.119368</td>\n",
       "      <td>-0.410640</td>\n",
       "      <td>-0.309380</td>\n",
       "      <td>0.295891</td>\n",
       "      <td>0.296041</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>727.0</th>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.54</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.302179</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.836748</td>\n",
       "      <td>-1.434351</td>\n",
       "      <td>-0.463019</td>\n",
       "      <td>2.129373</td>\n",
       "      <td>2.129724</td>\n",
       "      <td>1.686916</td>\n",
       "      <td>2.114191</td>\n",
       "      <td>1.910589</td>\n",
       "      <td>-0.340293</td>\n",
       "      <td>-0.340049</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5999995.0</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.83</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.628290</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>1.231584</td>\n",
       "      <td>1.261654</td>\n",
       "      <td>1.235796</td>\n",
       "      <td>0.248468</td>\n",
       "      <td>0.247847</td>\n",
       "      <td>0.656345</td>\n",
       "      <td>0.435555</td>\n",
       "      <td>1.665682</td>\n",
       "      <td>0.003934</td>\n",
       "      <td>0.004127</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5999996.0</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.93</td>\n",
       "      <td>1</td>\n",
       "      <td>11.0</td>\n",
       "      <td>-0.994626</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.581308</td>\n",
       "      <td>0.239991</td>\n",
       "      <td>-0.356314</td>\n",
       "      <td>-0.176726</td>\n",
       "      <td>-0.176689</td>\n",
       "      <td>-0.696066</td>\n",
       "      <td>-0.730380</td>\n",
       "      <td>-0.866149</td>\n",
       "      <td>-0.340293</td>\n",
       "      <td>-0.340049</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5999997.0</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4</td>\n",
       "      <td>6</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.73</td>\n",
       "      <td>0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>1.410882</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.303538</td>\n",
       "      <td>0.254127</td>\n",
       "      <td>0.852716</td>\n",
       "      <td>-0.285456</td>\n",
       "      <td>-0.285470</td>\n",
       "      <td>0.119244</td>\n",
       "      <td>-0.294231</td>\n",
       "      <td>-0.224891</td>\n",
       "      <td>-0.340293</td>\n",
       "      <td>-0.340049</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5999998.0</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3</td>\n",
       "      <td>5</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.64</td>\n",
       "      <td>1</td>\n",
       "      <td>14.0</td>\n",
       "      <td>-0.174088</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.511365</td>\n",
       "      <td>0.325045</td>\n",
       "      <td>-0.399178</td>\n",
       "      <td>-0.127856</td>\n",
       "      <td>-0.128049</td>\n",
       "      <td>-0.625198</td>\n",
       "      <td>-0.660332</td>\n",
       "      <td>-0.695741</td>\n",
       "      <td>-0.324205</td>\n",
       "      <td>-0.323964</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5999999.0</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3</td>\n",
       "      <td>6</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.80</td>\n",
       "      <td>1</td>\n",
       "      <td>14.0</td>\n",
       "      <td>1.445948</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.570797</td>\n",
       "      <td>0.268431</td>\n",
       "      <td>-0.369331</td>\n",
       "      <td>-0.164113</td>\n",
       "      <td>-0.163797</td>\n",
       "      <td>-0.681008</td>\n",
       "      <td>-0.532487</td>\n",
       "      <td>-0.556387</td>\n",
       "      <td>-0.255810</td>\n",
       "      <td>-0.255525</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>45931 rows × 28 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "           flag   专利  企业类型  区域   商标  控制人持股比例  控制人类型  注册时间      注册资本  著作权  ...  \\\n",
       "ID                                                                        ...   \n",
       "28.0        1.0  0.0     0   6  1.0     0.56      0   7.0 -1.036705  1.0  ...   \n",
       "230.0       1.0  0.0     0   1  0.0     1.00      0   8.0 -0.577344  0.0  ...   \n",
       "429.0       1.0  1.0     4   3  0.0     0.75      1   5.0  1.635303  0.0  ...   \n",
       "693.0       1.0  0.0     3   0  0.0     0.98      0  11.0  1.175942  0.0  ...   \n",
       "727.0       1.0  0.0     3   3  0.0     0.54      0   1.0  1.302179  0.0  ...   \n",
       "...         ...  ...   ...  ..  ...      ...    ...   ...       ...  ...  ...   \n",
       "5999995.0   0.0  0.0     1   5  1.0     0.83      0   0.0  1.628290  1.0  ...   \n",
       "5999996.0   0.0  1.0     0   4  1.0     0.93      1  11.0 -0.994626  0.0  ...   \n",
       "5999997.0   0.0  1.0     4   6  1.0     0.73      0  13.0  1.410882  0.0  ...   \n",
       "5999998.0   0.0  0.0     3   5  0.0     0.64      1  14.0 -0.174088  0.0  ...   \n",
       "5999999.0   0.0  1.0     3   6  1.0     0.80      1  14.0  1.445948  1.0  ...   \n",
       "\n",
       "               利润总额   所有者权益合计      纳税总额    股权融资成本    股权融资额度     营业总收入  \\\n",
       "ID                                                                      \n",
       "28.0      -0.247471 -0.004741 -0.463019  0.406379  0.405600 -0.338439   \n",
       "230.0     -0.402976  0.106861 -0.463019 -0.232299 -0.232355 -0.547484   \n",
       "429.0      0.637125 -0.461944 -0.463019  1.593755  1.592084  1.565990   \n",
       "693.0      0.199842  0.356249 -0.463019  0.278852  0.278391  0.119368   \n",
       "727.0      0.836748 -1.434351 -0.463019  2.129373  2.129724  1.686916   \n",
       "...             ...       ...       ...       ...       ...       ...   \n",
       "5999995.0  1.231584  1.261654  1.235796  0.248468  0.247847  0.656345   \n",
       "5999996.0 -0.581308  0.239991 -0.356314 -0.176726 -0.176689 -0.696066   \n",
       "5999997.0  0.303538  0.254127  0.852716 -0.285456 -0.285470  0.119244   \n",
       "5999998.0 -0.511365  0.325045 -0.399178 -0.127856 -0.128049 -0.625198   \n",
       "5999999.0 -0.570797  0.268431 -0.369331 -0.164113 -0.163797 -0.681008   \n",
       "\n",
       "               负债总额      资产总额  项目融资和政策融资成本  项目融资和政策融资额度  \n",
       "ID                                                       \n",
       "28.0      -0.399303 -0.584048    -0.340293    -0.340049  \n",
       "230.0     -0.371525 -0.454299    -0.284360    -0.284125  \n",
       "429.0      1.380232  1.632408    -0.340293    -0.340049  \n",
       "693.0     -0.410640 -0.309380     0.295891     0.296041  \n",
       "727.0      2.114191  1.910589    -0.340293    -0.340049  \n",
       "...             ...       ...          ...          ...  \n",
       "5999995.0  0.435555  1.665682     0.003934     0.004127  \n",
       "5999996.0 -0.730380 -0.866149    -0.340293    -0.340049  \n",
       "5999997.0 -0.294231 -0.224891    -0.340293    -0.340049  \n",
       "5999998.0 -0.660332 -0.695741    -0.324205    -0.323964  \n",
       "5999999.0 -0.532487 -0.556387    -0.255810    -0.255525  \n",
       "\n",
       "[45931 rows x 28 columns]"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "encoded_all_data=pd.concat([no_year,average],axis=1,sort=True)\n",
    "encoded_all_data.info()\n",
    "encoded_all_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "\n",
    "encoded_all_data.columns=['flag','zhuan_li','qiye_leixing','area','shangbiao','kongguren_chigubili','kongzhiren_leixing','regesiter_time','regesiter_money','work_right','field','main_run_shouru','worker_number','zhaiquanrongzi_chengben','zhaiquanrongzi_edu','neiburongzi_and_maoyirongzi_chengben','neiburongzi_and_maoyirongzi_edu','jinglirun','lirunzonge','suoyouzhe_quanyiheji','nashui_zonge','guquanrognzi_chengben','guquanrognzi_edu','yingye_zongshouru','fuzhai_zonge','zichan_zonge','xiangmurongzi_and_zhengcerongzi_chengben','xiangmurongzi_and_zhengcerongzi_edu']\n",
    "\n",
    "encoded_all_data=encoded_all_data[['flag','zhaiquanrongzi_chengben','zhaiquanrongzi_edu','neiburongzi_and_maoyirongzi_chengben','neiburongzi_and_maoyirongzi_edu','jinglirun','lirunzonge','suoyouzhe_quanyiheji','nashui_zonge','guquanrognzi_chengben','guquanrognzi_edu','yingye_zongshouru','fuzhai_zonge']]\n",
    "\n",
    "# sns.pairplot(encoded_all_data[['flag','zhuan_li','qiye_leixing','area','shangbiao','kongguren_chigubili','kongzhiren_leixing','regesiter_time','regesiter_money','work_right','field','main_run_shouru','worker_number']],hue='flag')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>flag</th>\n",
       "      <th>zhaiquanrongzi_chengben</th>\n",
       "      <th>zhaiquanrongzi_edu</th>\n",
       "      <th>neiburongzi_and_maoyirongzi_chengben</th>\n",
       "      <th>neiburongzi_and_maoyirongzi_edu</th>\n",
       "      <th>jinglirun</th>\n",
       "      <th>lirunzonge</th>\n",
       "      <th>suoyouzhe_quanyiheji</th>\n",
       "      <th>nashui_zonge</th>\n",
       "      <th>guquanrognzi_chengben</th>\n",
       "      <th>guquanrognzi_edu</th>\n",
       "      <th>yingye_zongshouru</th>\n",
       "      <th>fuzhai_zonge</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ID</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>28.0</th>\n",
       "      <td>1.0</td>\n",
       "      <td>-0.376463</td>\n",
       "      <td>-0.376167</td>\n",
       "      <td>-0.226258</td>\n",
       "      <td>-0.226812</td>\n",
       "      <td>-0.391587</td>\n",
       "      <td>-0.247471</td>\n",
       "      <td>-0.004741</td>\n",
       "      <td>-0.463019</td>\n",
       "      <td>0.406379</td>\n",
       "      <td>0.405600</td>\n",
       "      <td>-0.338439</td>\n",
       "      <td>-0.399303</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>230.0</th>\n",
       "      <td>1.0</td>\n",
       "      <td>-0.376463</td>\n",
       "      <td>-0.376167</td>\n",
       "      <td>-0.131425</td>\n",
       "      <td>-0.132086</td>\n",
       "      <td>-0.305196</td>\n",
       "      <td>-0.402976</td>\n",
       "      <td>0.106861</td>\n",
       "      <td>-0.463019</td>\n",
       "      <td>-0.232299</td>\n",
       "      <td>-0.232355</td>\n",
       "      <td>-0.547484</td>\n",
       "      <td>-0.371525</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>429.0</th>\n",
       "      <td>1.0</td>\n",
       "      <td>0.422401</td>\n",
       "      <td>0.422863</td>\n",
       "      <td>-0.325154</td>\n",
       "      <td>-0.325293</td>\n",
       "      <td>-1.181800</td>\n",
       "      <td>0.637125</td>\n",
       "      <td>-0.461944</td>\n",
       "      <td>-0.463019</td>\n",
       "      <td>1.593755</td>\n",
       "      <td>1.592084</td>\n",
       "      <td>1.565990</td>\n",
       "      <td>1.380232</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>693.0</th>\n",
       "      <td>1.0</td>\n",
       "      <td>-0.376463</td>\n",
       "      <td>-0.376167</td>\n",
       "      <td>-0.096611</td>\n",
       "      <td>-0.097367</td>\n",
       "      <td>-0.583686</td>\n",
       "      <td>0.199842</td>\n",
       "      <td>0.356249</td>\n",
       "      <td>-0.463019</td>\n",
       "      <td>0.278852</td>\n",
       "      <td>0.278391</td>\n",
       "      <td>0.119368</td>\n",
       "      <td>-0.410640</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>727.0</th>\n",
       "      <td>1.0</td>\n",
       "      <td>-0.376463</td>\n",
       "      <td>-0.376167</td>\n",
       "      <td>0.276225</td>\n",
       "      <td>0.274462</td>\n",
       "      <td>-1.224502</td>\n",
       "      <td>0.836748</td>\n",
       "      <td>-1.434351</td>\n",
       "      <td>-0.463019</td>\n",
       "      <td>2.129373</td>\n",
       "      <td>2.129724</td>\n",
       "      <td>1.686916</td>\n",
       "      <td>2.114191</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5999995.0</th>\n",
       "      <td>0.0</td>\n",
       "      <td>2.477889</td>\n",
       "      <td>2.478143</td>\n",
       "      <td>-0.325154</td>\n",
       "      <td>-0.325293</td>\n",
       "      <td>0.220857</td>\n",
       "      <td>1.231584</td>\n",
       "      <td>1.261654</td>\n",
       "      <td>1.235796</td>\n",
       "      <td>0.248468</td>\n",
       "      <td>0.247847</td>\n",
       "      <td>0.656345</td>\n",
       "      <td>0.435555</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5999996.0</th>\n",
       "      <td>0.0</td>\n",
       "      <td>-0.254723</td>\n",
       "      <td>-0.254376</td>\n",
       "      <td>-0.325154</td>\n",
       "      <td>-0.325293</td>\n",
       "      <td>-0.121522</td>\n",
       "      <td>-0.581308</td>\n",
       "      <td>0.239991</td>\n",
       "      <td>-0.356314</td>\n",
       "      <td>-0.176726</td>\n",
       "      <td>-0.176689</td>\n",
       "      <td>-0.696066</td>\n",
       "      <td>-0.730380</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5999997.0</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.126307</td>\n",
       "      <td>0.126595</td>\n",
       "      <td>0.714218</td>\n",
       "      <td>0.710454</td>\n",
       "      <td>0.567215</td>\n",
       "      <td>0.303538</td>\n",
       "      <td>0.254127</td>\n",
       "      <td>0.852716</td>\n",
       "      <td>-0.285456</td>\n",
       "      <td>-0.285470</td>\n",
       "      <td>0.119244</td>\n",
       "      <td>-0.294231</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5999998.0</th>\n",
       "      <td>0.0</td>\n",
       "      <td>-0.376463</td>\n",
       "      <td>-0.376167</td>\n",
       "      <td>-0.146600</td>\n",
       "      <td>-0.255767</td>\n",
       "      <td>-0.131595</td>\n",
       "      <td>-0.511365</td>\n",
       "      <td>0.325045</td>\n",
       "      <td>-0.399178</td>\n",
       "      <td>-0.127856</td>\n",
       "      <td>-0.128049</td>\n",
       "      <td>-0.625198</td>\n",
       "      <td>-0.660332</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5999999.0</th>\n",
       "      <td>0.0</td>\n",
       "      <td>-0.103286</td>\n",
       "      <td>-0.102876</td>\n",
       "      <td>-0.325154</td>\n",
       "      <td>-0.325293</td>\n",
       "      <td>-0.168026</td>\n",
       "      <td>-0.570797</td>\n",
       "      <td>0.268431</td>\n",
       "      <td>-0.369331</td>\n",
       "      <td>-0.164113</td>\n",
       "      <td>-0.163797</td>\n",
       "      <td>-0.681008</td>\n",
       "      <td>-0.532487</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>45931 rows × 13 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "           flag  zhaiquanrongzi_chengben  zhaiquanrongzi_edu  \\\n",
       "ID                                                             \n",
       "28.0        1.0                -0.376463           -0.376167   \n",
       "230.0       1.0                -0.376463           -0.376167   \n",
       "429.0       1.0                 0.422401            0.422863   \n",
       "693.0       1.0                -0.376463           -0.376167   \n",
       "727.0       1.0                -0.376463           -0.376167   \n",
       "...         ...                      ...                 ...   \n",
       "5999995.0   0.0                 2.477889            2.478143   \n",
       "5999996.0   0.0                -0.254723           -0.254376   \n",
       "5999997.0   0.0                 0.126307            0.126595   \n",
       "5999998.0   0.0                -0.376463           -0.376167   \n",
       "5999999.0   0.0                -0.103286           -0.102876   \n",
       "\n",
       "           neiburongzi_and_maoyirongzi_chengben  \\\n",
       "ID                                                \n",
       "28.0                                  -0.226258   \n",
       "230.0                                 -0.131425   \n",
       "429.0                                 -0.325154   \n",
       "693.0                                 -0.096611   \n",
       "727.0                                  0.276225   \n",
       "...                                         ...   \n",
       "5999995.0                             -0.325154   \n",
       "5999996.0                             -0.325154   \n",
       "5999997.0                              0.714218   \n",
       "5999998.0                             -0.146600   \n",
       "5999999.0                             -0.325154   \n",
       "\n",
       "           neiburongzi_and_maoyirongzi_edu  jinglirun  lirunzonge  \\\n",
       "ID                                                                  \n",
       "28.0                             -0.226812  -0.391587   -0.247471   \n",
       "230.0                            -0.132086  -0.305196   -0.402976   \n",
       "429.0                            -0.325293  -1.181800    0.637125   \n",
       "693.0                            -0.097367  -0.583686    0.199842   \n",
       "727.0                             0.274462  -1.224502    0.836748   \n",
       "...                                    ...        ...         ...   \n",
       "5999995.0                        -0.325293   0.220857    1.231584   \n",
       "5999996.0                        -0.325293  -0.121522   -0.581308   \n",
       "5999997.0                         0.710454   0.567215    0.303538   \n",
       "5999998.0                        -0.255767  -0.131595   -0.511365   \n",
       "5999999.0                        -0.325293  -0.168026   -0.570797   \n",
       "\n",
       "           suoyouzhe_quanyiheji  nashui_zonge  guquanrognzi_chengben  \\\n",
       "ID                                                                     \n",
       "28.0                  -0.004741     -0.463019               0.406379   \n",
       "230.0                  0.106861     -0.463019              -0.232299   \n",
       "429.0                 -0.461944     -0.463019               1.593755   \n",
       "693.0                  0.356249     -0.463019               0.278852   \n",
       "727.0                 -1.434351     -0.463019               2.129373   \n",
       "...                         ...           ...                    ...   \n",
       "5999995.0              1.261654      1.235796               0.248468   \n",
       "5999996.0              0.239991     -0.356314              -0.176726   \n",
       "5999997.0              0.254127      0.852716              -0.285456   \n",
       "5999998.0              0.325045     -0.399178              -0.127856   \n",
       "5999999.0              0.268431     -0.369331              -0.164113   \n",
       "\n",
       "           guquanrognzi_edu  yingye_zongshouru  fuzhai_zonge  \n",
       "ID                                                            \n",
       "28.0               0.405600          -0.338439     -0.399303  \n",
       "230.0             -0.232355          -0.547484     -0.371525  \n",
       "429.0              1.592084           1.565990      1.380232  \n",
       "693.0              0.278391           0.119368     -0.410640  \n",
       "727.0              2.129724           1.686916      2.114191  \n",
       "...                     ...                ...           ...  \n",
       "5999995.0          0.247847           0.656345      0.435555  \n",
       "5999996.0         -0.176689          -0.696066     -0.730380  \n",
       "5999997.0         -0.285470           0.119244     -0.294231  \n",
       "5999998.0         -0.128049          -0.625198     -0.660332  \n",
       "5999999.0         -0.163797          -0.681008     -0.532487  \n",
       "\n",
       "[45931 rows x 13 columns]"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "encoded_all_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [],
   "source": [
    "encoded_all_data.to_csv(r'./data/created_data/encoded_all_data.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "758d65def5724aac8e26f23516c9038f",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Tab(children=(HTML(value='<div id=\"overview-content\" class=\"row variable spacing\">\\n    <div class=\"row\">\\n   …"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "Report generated with <a href=\"https://github.com/pandas-profiling/pandas-profiling\">pandas-profiling</a>."
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": []
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ppf.ProfileReport(encoded_all_data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "ename": "SyntaxError",
     "evalue": "invalid syntax (<ipython-input-45-cacf9bf5cc02>, line 1)",
     "output_type": "error",
     "traceback": [
      "\u001b[1;36m  File \u001b[1;32m\"<ipython-input-45-cacf9bf5cc02>\"\u001b[1;36m, line \u001b[1;32m1\u001b[0m\n\u001b[1;33m    <br>profile.to_html()\u001b[0m\n\u001b[1;37m    ^\u001b[0m\n\u001b[1;31mSyntaxError\u001b[0m\u001b[1;31m:\u001b[0m invalid syntax\n"
     ]
    }
   ],
   "source": [
    "<br>profile.to_html()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
